/*
 *  Copyright (C) 2011 Jaime Pavlich-Mariscal
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package cl.ucn.disc.biblio.refcluster.clustering;

import java.io.File;
import java.io.PrintWriter;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;

import org.apache.log4j.Logger;
import org.jgrapht.UndirectedGraph;
import org.jgrapht.alg.BronKerboschCliqueFinder;
import org.jgrapht.graph.DefaultEdge;
import org.jgrapht.graph.SimpleGraph;

import cl.ucn.disc.biblio.refcluster.control.ReferenceClustererController;
import cl.ucn.disc.biblio.refcluster.database.TupleOfStrings;
import cl.ucn.disc.biblio.refcluster.database.TupleOfStringsMatcher;
import cl.ucn.disc.biblio.refcluster.reference.Author;
import cl.ucn.disc.biblio.refcluster.reference.Reference;
import cl.ucn.disc.biblio.refcluster.reference.ReferenceManager;
import cl.ucn.disc.biblio.refcluster.reference.Sentence;
import cl.ucn.disc.biblio.refcluster.util.MultiMap;
import cl.ucn.disc.biblio.refcluster.util.OrderedMultiMap;
import cl.ucn.disc.biblio.refcluster.util.Utils;
import ds.tree.RadixTree;
import ds.tree.RadixTreeImpl;
import ds.tree.Visitor;

/** Contains all of the methods to cluster references and authors
 *
 * @author Jaime Pavlich-Mariscal
 */
public class ClusterManager {
	

    private static final Logger LOGGER = Logger.getLogger(ReferenceClustererController.class.getName());
    
    private ReferenceManager rm = new ReferenceManager();
    private final Comparator<Set<?>> setSizeComparator = new Comparator<Set<?>>() {

        @Override
        public int compare(Set<?> o1, Set<?> o2) {
            // Puts first the biggest sets and last the smallest sets
            if (o1.size() > o2.size()) {
                return -1;
            } else if (o1.size() < o2.size()) {
                return 1;
            }
            return 0;
        }
    };


	/** Creates a set of clusters of references.
	 * @param allRefs The references to cluster
	 * @return The set of clusters of similar references
	 */
	public Set<Set<Reference>> clusterReferences(Collection<Reference> allRefs) {
	    ReferenceClustererController.LOGGER.info("Processing references per year");
	    Set<Set<Reference>> referenceClusters = new HashSet<Set<Reference>>();
	    OrderedMultiMap<String, Reference> referencesPerYear = rm.getRefsPerYear(allRefs);
	
	    for (String year : referencesPerYear.keySet()) {
	        try {
				if (year == null) {
				    continue;
				}
				ReferenceClustererController.LOGGER.info("Year: " + year);

				Collection<Set<Author>> authorClustersPerYear = clusterAuthors(referencesPerYear.get(year));

				MultiMap<Author, Reference> referencesPerAuthor = getReferencesPerAttribute("author", referencesPerYear.get(year), authorClustersPerYear);

				for (Author author : referencesPerAuthor.keySet()) {
				    List<Reference> refs = new LinkedList<Reference>(referencesPerAuthor.get(author));

				    referenceClusters.addAll(findClusters(refs, Reference.JOURNAL_VOL_PAGE_MATCHER));
				}
			} catch (Exception e) {
				throw new RuntimeException(e);
			}
	
	    }
	    return referenceClusters;
	}

	/** Create a set of cluster of authors
	 * @param refs The set of references from which to extract the authors to be clustered
	 * @return The set of clusters of similar authors
	 */
	public Set<Set<Author>> clusterAuthors(Set<Reference> refs) {
		try {
			
			LOGGER.info("Reading authors");
	        @SuppressWarnings("unchecked")
			Set<Author> authors = Utils.projection(refs, Reference.class.getDeclaredField("author"), HashSet.class, Author.class);
	        RadixTree<Author> rt = new RadixTreeImpl<Author>();
	        for (Author author : authors) {
	        	rt.insert(author.getString(), author);
	        }
	        
	        UndirectedGraph<Author, DefaultEdge> g = createSuperstringGraph(authors, rt);
	        Set<Set<Author>> authorClusters = findMaxNonIntersectingAndSizeGreaterThanOneCliques(g);
	        return authorClusters;
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

    
    
	public <T extends TupleOfStrings> Set<Set<T>> findClusters(List<T> elementsToCluster, TupleOfStringsMatcher<T> matcher) {
        UndirectedGraph<T, DefaultEdge> g = createMatchGraph(elementsToCluster, matcher);
        Set<Set<T>> authorClusters = findMaxNonIntersectingAndSizeGreaterThanOneCliques(g);
        return authorClusters;
    }
	
	@SuppressWarnings("unchecked")
	public <T extends TupleOfStrings> Set<Set<T>> readClusters(File clusterFile, Class<T> tupleClass) throws Exception {
	    LOGGER.info("Opening cluster file " + clusterFile);
	
	    Set<Set<T>> clusters = new LinkedHashSet<Set<T>>();
	    Scanner s = new Scanner(clusterFile);
	
	    Set<T> cluster = new LinkedHashSet<T>();
	    clusters.add(cluster);
	    while (s.hasNextLine()) {
	       	String line = s.nextLine();
	       	if (line.trim().equals("")) {
	            // If the line is empty, it assumes that it is the end of the cluster
	            if (s.hasNextLine()) {
	            	// If still has more lines
	                cluster = new LinkedHashSet<T>();
	                clusters.add(cluster);
	            }
	       	} else {
	       		TupleOfStrings t = tupleClass.newInstance();
	       		t.parse(line.trim().replaceAll("\\s+", " "));
	            cluster.add((T) t);
	        }
	    }
	    s.close();
	    
	
	    return clusters;		
	}


	public <T extends TupleOfStrings> Map<String, Set<T>> createClusterMap(Collection<Set<T>> sentenceClusters) {
	    Map<String, Set<T>> clusterMap = new LinkedHashMap<String, Set<T>>();
	    for (Set<T> cluster : sentenceClusters) {
	        for (T s : cluster) {
	            clusterMap.put(s.getString(), cluster);
	        }
	    }
	    return clusterMap;
	}
    
    
    
    /**
     * Creates an undirected graph where the nodes are {@link TupleOfStrings} instances
     * and the edges connect two nodes if they match using
     * {@link TupleOfStringsMatcher#matches(cl.ucn.disc.biblio.refcluster.database.TupleOfStrings, cl.ucn.disc.biblio.refcluster.database.TupleOfStrings) }
     * @param <T>
     * @param tuples Set of objects t
     * @param matcher
     * @return
     */
    private <T extends TupleOfStrings> UndirectedGraph<T, DefaultEdge> createMatchGraph(List<T> tuples, TupleOfStringsMatcher<T> matcher) {
    	LOGGER.info("Creating match graph...");
        UndirectedGraph<T, DefaultEdge> g = new SimpleGraph<T, DefaultEdge>(DefaultEdge.class);
        for (T tuple : tuples) {
            g.addVertex(tuple);
        }
        for (int i = 0; i < tuples.size(); i++) {
            for (int j = i + 1; j < tuples.size(); j++) {
                T tuple1 = tuples.get(i);
                T tuple2 = tuples.get(j);
                // Creates an edge only if it does not exist and the matchables match, but are not equal
                // (i.e., avoids creation of self-referencing edges)
                if (g.getEdge(tuple1, tuple2) == null && matcher.matches(tuple1, tuple2) && !tuple1.equals(tuple2)) {
                    g.addEdge(tuple1, tuple2);
                }
            }
        }
        return g;
    }

    /**
     * Creates an undirected graph where the nodes are {@link TupleOfStrings} instances
     * and the edges connect two nodes if one is superstring of the other.
     * To improve speed, this method takes as argument a radix tree that indexes the strings of the
     * tuples.
     * TODO Optimize using  {@link Visitor}
     * @param <T>
     * @param tuples
     * @param radixTree
     * @return
     */
    

    private <T extends TupleOfStrings> UndirectedGraph<T, DefaultEdge> createSuperstringGraph(Collection<T> tuples, RadixTree<T> radixTree) {
    	LOGGER.info("Creating match graph using radix tree...");
        UndirectedGraph<T, DefaultEdge> g = new SimpleGraph<T, DefaultEdge>(DefaultEdge.class);
        
        // Creates vertexes
        for (T tuple : tuples) {
        	 g.addVertex(tuple);
        }
        
        // Creates edges
        for (T tuple : tuples) {
        	List<T> superstrings = radixTree.searchPrefix(tuple.getString(), Integer.MAX_VALUE);
        	for (T superstring : superstrings) {
        		if (!superstring.equals(tuple)) {
        			if (g.getEdge(tuple, superstring) == null) {
        				g.addEdge(tuple, superstring);
        			}
        		}
        	}
        }
        
        
        return g;
    }

    /**
     * Finds all of the maximal non-intersecting cliques, with size > 1.
     *
     * @param <T>
     * @param g
     * @return
     */
    private <T extends TupleOfStrings> Set<Set<T>> findMaxNonIntersectingAndSizeGreaterThanOneCliques(UndirectedGraph<T, DefaultEdge> g) {
    	LOGGER.info("Finding maximum clusters...");
        BronKerboschCliqueFinder<T, DefaultEdge> cf = new BronKerboschCliqueFinder<T, DefaultEdge>(g);
        Collection<Set<T>> cliques = cf.getAllMaximalCliques();

        // Obtain only those cliques that do not intersect with others, to avoid
        // ambiguities in author/journal names
        Set<Set<T>> clusters = findNonIntersectingCliques(cliques);

        // Removes clusteres with one element
        Iterator<Set<T>> it = clusters.iterator();
        while (it.hasNext()) {
            Set<T> cluster = it.next();
            if (cluster.size() <= 1) {
                it.remove();
            }
        }

        return clusters;
    }

    private <T extends TupleOfStrings> Set<Set<T>> findNonIntersectingCliques(Collection<Set<T>> cliques) {
        // Creates a graph of intersecting cliques
        UndirectedGraph<Set<T>, DefaultEdge> g = createMatchableSetIntersectionGraph(cliques);

        // Sorts cliques by size, from larger to smaller.
        PriorityQueue<Set<T>> sortedCliques = new PriorityQueue<Set<T>>(10, setSizeComparator);
        sortedCliques.addAll(g.vertexSet());

        // Adds to the result collection all of those cliques that: (a) have no
        // other intersecting cliques or
        // (b) are the biggest among their intersecting cliques
        Set<Set<T>> result = new LinkedHashSet<Set<T>>();
        Set<Set<T>> excludedCliques = new LinkedHashSet<Set<T>>();
        for (Set<T> s : sortedCliques) {
            if (g.edgesOf(s).isEmpty()) {
                result.add(s);
            } else if (!excludedCliques.contains(s)) {
                result.add(s);
                for (DefaultEdge edge : g.edgesOf(s)) {
                    Set<T> s1 = g.getEdgeSource(edge);
                    Set<T> s2 = g.getEdgeTarget(edge);
                    if (s1 != s) {
                        excludedCliques.add(s1);
                    }
                    if (s2 != s) {
                        excludedCliques.add(s2);
                    }
                }
            }
        }
        return result;
    }

    /**
     * Creates a graph where nodes are sets of {@link TupleOfStrings} and edges between two
     * sets indicate that they intersect.
     *
     * @param <T>
     * @param sets
     * @return An undirected graph
     */
    private <T extends TupleOfStrings> UndirectedGraph<Set<T>, DefaultEdge> createMatchableSetIntersectionGraph(Collection<Set<T>> collectionOfSets) {
        List<Set<T>> sets = new ArrayList<Set<T>>(collectionOfSets);
        UndirectedGraph<Set<T>, DefaultEdge> g = new SimpleGraph<Set<T>, DefaultEdge>(DefaultEdge.class);
        for (Set<T> s : sets) {
            g.addVertex(s);
        }
        for (int i = 0; i < sets.size(); i++) {
            for (int j = i + 1; j < sets.size(); j++) {
                Set<T> s1 = sets.get(i);
                Set<T> s1Tmp = new TreeSet<T>(s1);
                Set<T> s2 = sets.get(j);
                s1Tmp.retainAll(s2);
                if (!s1Tmp.isEmpty()) {
                    g.addEdge(s1, s2);
                }
            }
        }
        return g;
    }


    private OrderedMultiMap<Author, Reference> getReferencesPerAttribute(String fieldName, Set<Reference> refs, Collection<Set<Author>> clusters) {
        Map<String, Set<Author>> stringToClusterMap = createClusterMap(clusters);

        OrderedMultiMap<Author, Reference> refsPerAttribute = new OrderedMultiMap<Author, Reference>();
        for (Reference r : refs) {
            String value;
            try {
            	
                Field field = Reference.class.getDeclaredField(fieldName);
                field.setAccessible(true);
				value = (String) field.get(r);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            if (stringToClusterMap.get(value) != null) {
                refsPerAttribute.add(stringToClusterMap.get(value).iterator().next(), r);
            }
        }
        return refsPerAttribute;
    }

	public void writeReferenceClusters(Set<Set<Reference>> clusters, PrintWriter out) {

	    Comparator<Set<Reference>> comparator = new Comparator<Set<Reference>>() {
	
	        @Override
	        public int compare(Set<Reference> s1, Set<Reference> s2) {
	            if (s1 == s2 || (s1 != null && s2 != null && s1.isEmpty() && s2.isEmpty())) {
	                return 0;
	            }
	
	            if (s1 == null) {
	                return -1;
	            }
	            if (s2 == null) {
	                return 1;
	            }
	            Reference r1 = rm.createSuperReference(s1);
	            Reference r2 = rm.createSuperReference(s2);
	            String str1 = r1.getAuthor() + r1.getYear() + r1.getJournal();
	            String str2 = r2.getAuthor() + r2.getYear() + r2.getJournal();
	            return str1.compareTo(str2);
	        }
	    };

	
	
	    writeClusters(clusters, out, comparator);
	}

	public <T extends TupleOfStrings> void writeClusters(Set<Set<T>> clusters, PrintWriter out, Comparator<Set<T>> comparator) {
	    LOGGER.info("Writing clusters to file");
		Set<Set<T>> orderedClusters;
		if (comparator == null) {
			orderedClusters = clusters;
		} else {
			orderedClusters = new TreeSet<Set<T>>(comparator);
			for (Set<T> cluster : clusters) {
				orderedClusters.add(cluster);
		
		    }
		}
	
	    for (Set<T> cluster : orderedClusters) {
	        // Reverse-order references
	        Set<T> orderedTuples = new TreeSet<T>();
	        orderedTuples.addAll(cluster);
	        for (T t : orderedTuples) {
	            out.println(t.getUnprocessedString());
	        }
	        out.println();
	    }
	    out.flush();
	    
	}

	public void writeAuthorClusters(Set<Set<Author>> clusters, PrintWriter out) {
		 Comparator<Set<Author>> comparator = new Comparator<Set<Author>>() {

			@Override
			public int compare(Set<Author> o1, Set<Author> o2) {
				if (o1 == null) {
					if (o2 == null) {
						return 0;
					} else {
						return -1;
					}
				} else if (o2 == null) {
					return 1;
				} else if (o1.isEmpty()) {
					if (o2.isEmpty()) {
						return 0;
					} else {
						return -1;
					}
					
				} else if (o2.isEmpty()) {
					return 1;
				} else {
					return o1.iterator().next().getString().compareTo(o2.iterator().next().getString());
				}
				
			}
			 
			 
		 };
		writeClusters(clusters, out, comparator);
		
	}

	public <T extends TupleOfStrings> Map<T, String> getReplacementsFromClusters(Collection<Set<T>> clusters) {
	    LOGGER.info("Defining new names for references");
	
	    Map<T, String> replacements = new LinkedHashMap<T, String>();
	    for (Set<T> cluster : clusters) {
	        Iterator<T> it = cluster.iterator();
	        T replacement = getLongestString(cluster);
	        while (it.hasNext()) {
	            replacements.put(it.next(), replacement.getString());
	        }
	    }
	    
	
	    return replacements;
	}

	private <T extends TupleOfStrings> T getLongestString(Set<T> cluster) {
		T longest = null;
		int longestLength = 0;
		for (Iterator<T> it = cluster.iterator(); it.hasNext(); ) {
			T t = it.next();
			if (t.getString().length() > longestLength) {
				longestLength = t.getString().length();
				longest = t;
			}
		}
		return longest;
	}
	
}
